{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Code to read mbox file (downloaded from google)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Code from https://stackoverflow.com/questions/59681461/read-a-big-mbox-file-with-python/59682472#59682472"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import email\n",
    "from email.policy import default\n",
    "from tqdm import tqdm\n",
    "from bs4 import BeautifulSoup #to clean the payload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install beautifulsoup4\n",
    "!pip install pandas\n",
    "!pip install tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "class MboxReader:\n",
    "    def __init__(self, filename):\n",
    "        self.handle = open(filename, 'rb')\n",
    "        assert self.handle.readline().startswith(b'From ')\n",
    "\n",
    "    def __enter__(self):\n",
    "        return self\n",
    "\n",
    "    def __exit__(self, exc_type, exc_value, exc_traceback):\n",
    "        self.handle.close()\n",
    "\n",
    "    def __iter__(self):\n",
    "        return iter(self.__next__())\n",
    "\n",
    "    def __next__(self):\n",
    "        lines = []\n",
    "        while True:\n",
    "            line = self.handle.readline()\n",
    "            if line == b'' or line.startswith(b'From '):\n",
    "                yield email.message_from_bytes(b''.join(lines), policy=default)\n",
    "                if line == b'':\n",
    "                    break\n",
    "                lines = []\n",
    "                continue\n",
    "            lines.append(line)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Possible keys from message ['X-GM-THRID', 'X-Gmail-Labels', 'Delivered-To', 'Received', 'X-Received',\n",
    " 'ARC-Seal', 'ARC-Message-Signature', 'ARC-Authentication-Results', \n",
    "'Return-Path', 'Received', 'Received-SPF', 'Authentication-Results', \n",
    "'DKIM-Signature', 'X-Google-DKIM-Signature', 'X-Gm-Message-State', \n",
    "'X-Google-Smtp-Source', 'MIME-Version', 'X-Received', 'Date', 'Reply-To',\n",
    " 'X-Google-Id', 'Precedence', 'List-Unsubscribe', 'Feedback-ID', 'List-Id',\n",
    " 'X-Notifications', 'X-Notifications-Bounce-Info', 'Message-ID', 'Subject',\n",
    " 'From', 'To', 'Content-Type']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "possible_keys =  ['X-GM-THRID', 'X-Gmail-Labels', 'Delivered-To', 'Received', 'X-Received',\n",
    " 'ARC-Seal', 'ARC-Message-Signature', 'ARC-Authentication-Results', \n",
    "'Return-Path', 'Received', 'Received-SPF', 'Authentication-Results', \n",
    "'DKIM-Signature', 'X-Google-DKIM-Signature', 'X-Gm-Message-State', \n",
    "'X-Google-Smtp-Source', 'MIME-Version', 'X-Received', 'Date', 'Reply-To',\n",
    " 'X-Google-Id', 'Precedence', 'List-Unsubscribe', 'Feedback-ID', 'List-Id',\n",
    " 'X-Notifications', 'X-Notifications-Bounce-Info', 'Message-ID', 'Subject',\n",
    " 'From', 'To', 'Content-Type']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "329it [00:02, 121.85it/s]C:\\Users\\eivin\\AppData\\Local\\Temp\\ipykernel_20904\\2504974431.py:19: MarkupResemblesLocatorWarning: The input looks more like a filename than markup. You may want to open this file and pass the filehandle into Beautiful Soup.\n",
      "  soup = BeautifulSoup(payload, 'html.parser')\n",
      "560it [00:04, 120.06it/s]\n"
     ]
    }
   ],
   "source": [
    "path = r\".\\All mail Including Spam and Trash.mbox\"\n",
    "\n",
    "mbox = MboxReader(path)\n",
    "\n",
    "MAX_EMAILS = 100\n",
    "current_mails = 0\n",
    "\n",
    "all_mail_contents = \"\"\n",
    "mail_from_arr, mail_date_arr, mail_body_arr = [],[],[]\n",
    "for idx,message in tqdm(enumerate(mbox)):\n",
    "    # print(message.keys())\n",
    "    mail_from = f\"{str(message['From'])}\\n\".replace('\"','').replace('\\n','').strip()\n",
    "    mail_date = f\"{str(message['Date'])}\\n\".replace('\"','').replace('\\n','').strip()\n",
    "    payload = message.get_payload(decode=True)\n",
    "    if payload:\n",
    "        current_mails += 1\n",
    "        if current_mails > MAX_EMAILS:\n",
    "            break\n",
    "        soup = BeautifulSoup(payload, 'html.parser')\n",
    "        body_text = soup.get_text().replace('\"','').replace(\"\\n\", \"\").replace(\"\\t\", \"\").strip()\n",
    "        mail_from_arr.append(mail_from)\n",
    "        mail_date_arr.append(mail_date)\n",
    "        mail_body_arr.append(body_text)\n",
    "        all_mail_contents += body_text + \" \""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame({'From':mail_from_arr, 'Date':mail_date_arr, 'Body':mail_body_arr})\n",
    "df.to_pickle(\"df_mail.pkl\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# write all mail contents to txt\n",
    "with open(\"all_mail_contents.txt\", \"w\", encoding=\"utf-8\") as f:\n",
    "\tf.write(all_mail_contents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load all mail contents\n",
    "with open(\"all_mail_contents.txt\", \"r\", encoding=\"utf-8\") as f:\n",
    "\tall_mail_contents = f.read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['From', 'Date', 'Body'], dtype='object')"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "texts = df[\"Body\"].tolist()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>From</th>\n",
       "      <th>Date</th>\n",
       "      <th>Body</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>suh@eie.noSusanna Hjelset / EIE Frogner &amp; Aker...</td>\n",
       "      <td>Wed, 06 Mar 2024 21:52:55 +0100</td>\n",
       "      <td>Hei Eivind Kjosbakken.Takk for din interesse f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>noreply@skatteetaten.no</td>\n",
       "      <td>Mon, 11 Mar 2024 12:32:48 +0100</td>\n",
       "      <td>Skattemeldingen din er nå tilgjengelig. Logg i...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>DNB &lt;noreply@info.dnb.no&gt;</td>\n",
       "      <td>Tue, 27 Feb 2024 11:35:35 +0100</td>\n",
       "      <td>DNBFå 20 - 30 % rabatt på kjente merkevarer\\r ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Digipost &lt;no-reply@digipost.no&gt;</td>\n",
       "      <td>Sat, 03 Feb 2024 10:04:05 +0000</td>\n",
       "      <td>Hei, Eivind\\r\\rDu har fått en ny melding i Dig...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>DNB &lt;noreply@info.dnb.no&gt;</td>\n",
       "      <td>Mon, 11 Mar 2024 21:43:10 +0100</td>\n",
       "      <td>DNBBli oppdatert på ett minutt. Se siste marke...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>no-reply@ntnu.no</td>\n",
       "      <td>Thu, 04 Jan 2024 12:08:09 +0100</td>\n",
       "      <td>Dette er et automatisk varsel om at vi nå har ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Gjensidige &lt;gjensidige.privat@gjensidige.no&gt;</td>\n",
       "      <td>Thu, 25 Jan 2024 19:12:43 +0100</td>\n",
       "      <td>Gjensidige Nytt år og nye tips og råd fra oss ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>DNB &lt;noreply@info.dnb.no&gt;</td>\n",
       "      <td>Tue, 12 Mar 2024 10:56:52 +0100</td>\n",
       "      <td>DNBFå 20 - 30 % rabatt på kjente merkevarer\\r ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>DNB &lt;noreply@info.dnb.no&gt;</td>\n",
       "      <td>Sun, 07 Jan 2024 13:00:11 +0100</td>\n",
       "      <td>DNBDu som er så god til å spare, bør ta en tit...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Microsoft Learn &lt;Learn@mails.microsoft.com&gt;</td>\n",
       "      <td>Tue, 13 Feb 2024 12:44:25 -0800</td>\n",
       "      <td>Skilling-XLifecycle-ModernCredentialsProjectSp...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Digipost &lt;no-reply@digipost.no&gt;</td>\n",
       "      <td>Sun, 07 Jan 2024 06:06:32 +0000</td>\n",
       "      <td>Hei, Eivind\\r\\rDu har fått en ny melding i Dig...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>no-reply@altinn.no</td>\n",
       "      <td>Sat, 10 Feb 2024 10:44:51 +0100</td>\n",
       "      <td>Innhold: Lønnsslipp20240210.  Logg på www.alti...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Revolut &lt;no-reply@revolut.com&gt;</td>\n",
       "      <td>Thu, 18 Jan 2024 09:14:59 +0000</td>\n",
       "      <td>Your documents are readyYour Statement of Fees...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>no-reply@ntnu.no</td>\n",
       "      <td>Mon, 08 Jan 2024 12:14:41 +0100</td>\n",
       "      <td>Dette er et automatisk varsel om at vi nå har ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>DNB &lt;noreply@info.dnb.no&gt;</td>\n",
       "      <td>Wed, 21 Feb 2024 15:17:34 +0100</td>\n",
       "      <td>DNBHa finansieringen i orden\\r  Se webversjon ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>DNB &lt;noreply@info.dnb.no&gt;</td>\n",
       "      <td>Mon, 12 Feb 2024 15:42:48 +0100</td>\n",
       "      <td>DNBBli oppdatert på ett minutt. Se siste marke...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Gjensidige &lt;gjensidige.privat@gjensidige.no&gt;</td>\n",
       "      <td>Thu, 18 Jan 2024 16:31:02 +0100</td>\n",
       "      <td>Kundevalget til generalforsamlingen i Gjensidi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Gjensidige &lt;gjensidige.privat@gjensidige.no&gt;</td>\n",
       "      <td>Wed, 17 Jan 2024 20:41:00 +0100</td>\n",
       "      <td>Kjenner du barn i alderen 9-12 år som spiller ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>service@paypal.com &lt;service@paypal.com&gt;</td>\n",
       "      <td>Tue, 05 Mar 2024 11:31:53 -0800</td>\n",
       "      <td>Pålogging fra en ny enhetEivind, vi har oppdag...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>DNB &lt;noreply@info.dnb.no&gt;</td>\n",
       "      <td>Thu, 18 Jan 2024 18:12:29 +0100</td>\n",
       "      <td>DNBBli oppdatert på ett minutt \\r  Se webversj...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 From  \\\n",
       "0   suh@eie.noSusanna Hjelset / EIE Frogner & Aker...   \n",
       "1                             noreply@skatteetaten.no   \n",
       "2                           DNB <noreply@info.dnb.no>   \n",
       "3                     Digipost <no-reply@digipost.no>   \n",
       "4                           DNB <noreply@info.dnb.no>   \n",
       "5                                    no-reply@ntnu.no   \n",
       "6        Gjensidige <gjensidige.privat@gjensidige.no>   \n",
       "7                           DNB <noreply@info.dnb.no>   \n",
       "8                           DNB <noreply@info.dnb.no>   \n",
       "9         Microsoft Learn <Learn@mails.microsoft.com>   \n",
       "10                    Digipost <no-reply@digipost.no>   \n",
       "11                                 no-reply@altinn.no   \n",
       "12                     Revolut <no-reply@revolut.com>   \n",
       "13                                   no-reply@ntnu.no   \n",
       "14                          DNB <noreply@info.dnb.no>   \n",
       "15                          DNB <noreply@info.dnb.no>   \n",
       "16       Gjensidige <gjensidige.privat@gjensidige.no>   \n",
       "17       Gjensidige <gjensidige.privat@gjensidige.no>   \n",
       "18            service@paypal.com <service@paypal.com>   \n",
       "19                          DNB <noreply@info.dnb.no>   \n",
       "\n",
       "                               Date  \\\n",
       "0   Wed, 06 Mar 2024 21:52:55 +0100   \n",
       "1   Mon, 11 Mar 2024 12:32:48 +0100   \n",
       "2   Tue, 27 Feb 2024 11:35:35 +0100   \n",
       "3   Sat, 03 Feb 2024 10:04:05 +0000   \n",
       "4   Mon, 11 Mar 2024 21:43:10 +0100   \n",
       "5   Thu, 04 Jan 2024 12:08:09 +0100   \n",
       "6   Thu, 25 Jan 2024 19:12:43 +0100   \n",
       "7   Tue, 12 Mar 2024 10:56:52 +0100   \n",
       "8   Sun, 07 Jan 2024 13:00:11 +0100   \n",
       "9   Tue, 13 Feb 2024 12:44:25 -0800   \n",
       "10  Sun, 07 Jan 2024 06:06:32 +0000   \n",
       "11  Sat, 10 Feb 2024 10:44:51 +0100   \n",
       "12  Thu, 18 Jan 2024 09:14:59 +0000   \n",
       "13  Mon, 08 Jan 2024 12:14:41 +0100   \n",
       "14  Wed, 21 Feb 2024 15:17:34 +0100   \n",
       "15  Mon, 12 Feb 2024 15:42:48 +0100   \n",
       "16  Thu, 18 Jan 2024 16:31:02 +0100   \n",
       "17  Wed, 17 Jan 2024 20:41:00 +0100   \n",
       "18  Tue, 05 Mar 2024 11:31:53 -0800   \n",
       "19  Thu, 18 Jan 2024 18:12:29 +0100   \n",
       "\n",
       "                                                 Body  \n",
       "0   Hei Eivind Kjosbakken.Takk for din interesse f...  \n",
       "1   Skattemeldingen din er nå tilgjengelig. Logg i...  \n",
       "2   DNBFå 20 - 30 % rabatt på kjente merkevarer\\r ...  \n",
       "3   Hei, Eivind\\r\\rDu har fått en ny melding i Dig...  \n",
       "4   DNBBli oppdatert på ett minutt. Se siste marke...  \n",
       "5   Dette er et automatisk varsel om at vi nå har ...  \n",
       "6   Gjensidige Nytt år og nye tips og råd fra oss ...  \n",
       "7   DNBFå 20 - 30 % rabatt på kjente merkevarer\\r ...  \n",
       "8   DNBDu som er så god til å spare, bør ta en tit...  \n",
       "9   Skilling-XLifecycle-ModernCredentialsProjectSp...  \n",
       "10  Hei, Eivind\\r\\rDu har fått en ny melding i Dig...  \n",
       "11  Innhold: Lønnsslipp20240210.  Logg på www.alti...  \n",
       "12  Your documents are readyYour Statement of Fees...  \n",
       "13  Dette er et automatisk varsel om at vi nå har ...  \n",
       "14  DNBHa finansieringen i orden\\r  Se webversjon ...  \n",
       "15  DNBBli oppdatert på ett minutt. Se siste marke...  \n",
       "16  Kundevalget til generalforsamlingen i Gjensidi...  \n",
       "17  Kjenner du barn i alderen 9-12 år som spiller ...  \n",
       "18  Pålogging fra en ny enhetEivind, vi har oppdag...  \n",
       "19  DNBBli oppdatert på ett minutt \\r  Se webversj...  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Skilling-XLifecycle-ModernCredentialsProjectSpike-1-A-EM-ProEarn your Microsoft Applied Skills credential to demonstrate your proficiency.\\r͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0 ͏ \\u200c \\xa0\\r\\r         \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\r         \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\r         \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0 \\xa0\\r                                                          View as a\\xa0web page\\r                                                \\xa0\\xa0\\xa0 Become indispensable with Microsoft Applied Skills \\xa0Learn more\\xa0\\xa0\\xa0\\xa0Make a bigger impact on your projects, your organisation – and your career. Demonstrate your proficiency in skills that showcase your ability to handle highly technical scenarios and take on projects that are critical to organisational success.See how Microsoft Applied Skills credentials empower you to:Learn by doing and validate your skills in real time through interactive lab‑based assessments.Acquire the skills you need to broaden your career opportunities in a way that works best for you.Showcase your Microsoft‑verified skills on LinkedIn and by sharing your credentials with organisations.\\xa0\\xa0\\xa0This email was sent from an unmonitored inbox.\\r\\xa0Unsubscribe | Privacy Statement\\xa0Microsoft Corporation,\\xa0One Microsoft Way,\\xa0Redmond, WA 98052'"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "texts[9]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
